





#############
#############
#### Distributional Consequences of Policies for Electric Heat Conversion ####
####. 2_ Analysis.R
#############
#############




# Unless otherwise noted, analysis is agnostic as to package version

require(data.table) # version 1.14.8
require(sf)
require(raster)
require(tidyverse)
require(tidycensus) # version 1.5
require(tigris)
require(mapview)
require(parallel)
require(snow)
require(doSNOW)
require(here)
require(stringr)
require(readr)
require(readxl)
require(scales)
require(tictoc)
require(lubridate) # version 1.9.3
require(zoo)
require(httr)
require(jsonlite)
require(usdata)
require(forcats)
require(fixest)
require(scales)
require(raster)
require(devtools)
require(patchwork)
require(fastDummies)
require(glmnet)
require(caret)
require(ranger)
require(fixest) # version 0.11.1
require(broom)
require(beepr)
require(modelsummary)
require(ggrepel)




UseGPSC = TRUE  # Set whether or not to use Georgia Pub. Services Comm. survey data (default to TRUE)
reintersect = T

weighted.mean = stats::weighted.mean  # some issues with raster/terra's weighted.mean
options(tigris_use_cache = TRUE)
sf::sf_use_s2(FALSE) 
eia.key = # <Add your eia key here>
blsAPIkey = # <Add your BLS API key here>
options(warn=1)  # wtf - max(c(NA,NA), na.rm=T) was suddenly becoming fatal (error, not warning + -Inf)

year = lubridate::year #--> to ensure that year() and quarter() are from lubridate; there is overlap / masking
quarter = lubridate::quarter

dMode<-function(v){
  uniqv<-unique(v)[unique(v)!='wtf']
  uniqv[which.max(tabulate(match(v, uniqv)))]
}

jStataPrep<-function(x){
  colnames(x) = gsub(colnames(x), pattern='\\.', replacement='_')
  if(!'sf' %in% class(x)) return(x)
  return(x %>% st_set_geometry(NULL))
}


##########
### Load data
##########

# Set BASE to Code Repository folder

BASE = file.path('YOURPATHHERE/Code Repository')
DATA.PROC = file.path(BASE, 'Data_Proc')
DATA.ARCHIVE = file.path(BASE, 'Data_Archive')
FINAL.OUTPUT = file.path(BASE, 'Final_Output')
if(!UseGPSC) FINAL.OUTPUT = file.path(FINAL.OUTPUT, 'GeorgiaAlt'); dir.create(FINAL.OUTPUT)



CPI = read_csv(file.path(DATA.PROC, 'CPI.csv')) %>%
  dplyr::select(yq, ym, CPI, CPI2019) %>%
  dplyr::mutate(yq = as.yearqtr(yq),
                ym = as.yearmon(ym))

###
###
### Get PUMS shapefile
###
###

PUMS.sf = tigris::pumas(state = NULL,
                        cb = TRUE, 
                        year = 2019) %>%
  dplyr::filter(as.integer(STATEFP10)<=56 & STATEFP10!='02' & STATEFP10!='02' & STATEFP10!='15') %>%
  st_make_valid() %>%
  dplyr::mutate(CDIV = case_when(
    STATEFP10 %in% c('09','23','25','33','44','50') ~ 'New England',
    STATEFP10 %in% c('34','36','42') ~ 'Middle Atlantic',
    STATEFP10 %in% c('17','18','26','39','55') ~ 'East North Central',
    STATEFP10 %in% c('01','21','28','47') ~ 'East South Central',
    STATEFP10 %in% c('56','49','30','16','08') ~ 'Mountain North',
    STATEFP10 %in% c('04','32','35') ~ 'Mountain South',
    STATEFP10 %in% c('06','41','53','15','02') ~ 'Pacific',
    STATEFP10 %in% c('10','11','12','13','24','37','45','51','54') ~ 'South Atlantic',
    STATEFP10 %in% c('19','20','27','29','31','38','46') ~ 'West North Central',
    STATEFP10 %in% c('05','22','40','48') ~ 'West South Central'))



      newUrban = !any(grepl(list.files(DATA.PROC), pattern = 'stUrban\\.rds')) 
      if(newUrban){  # Takes over 18 hours, so can use the stUrban included 
        stUrban = foreach(stx = iter(unique(PUMS.sf$STATEFP)), 
                          .inorder = F, .errorhandling = 'pass',
                          .packages = c('dplyr','data.table','zoo','tigris'), .noexport = c('USgas.censusz'),
                          .verbose=T) %dopar% {  
                            blocks(state = stx, year = 2010, class = 'sf', refresh = F) %>%
                              group_by(UATYP10) %>%
                              dplyr::summarize() %>%
                              dplyr::mutate(UATYP10 = ifelse(!is.na(UATYP10), UATYP10, 'R'),
                                            STATEFP = stx,
                                            state = fips_codes$state[match(stx, fips_codes$state_code)])
                          }  
        
        saveRDS(bind_rows(stUrban), file = file.path(DATA.PROC, 'stUrban.rds'))
        stUrban = bind_rows(stUrban)
      } else {
        stUrban = readRDS(file.path(DATA.PROC, 'stUrban.rds'))
      }


czmap = st_read('https://services7.arcgis.com/FGr1D95XCGALKXqM/arcgis/rest/services/Dissolve_Climate_Zones_DOE_BA/FeatureServer/0/query?outFields=*&where=1%3D1&f=geojson') %>%
  st_transform(crs = st_crs(stUrban)) %>% st_make_valid() %>%
  dplyr::mutate(IECC_Moisture_Regime = ifelse(IECC_Moisture_Regime=='N/A', as.character(NA), IECC_Moisture_Regime)) %>%
  st_intersection(stUrban %>% dplyr::select(State = state, UATYP10) %>%  st_make_valid())  %>%
  dplyr::mutate(IECC_Moisture_RegimeX = case_when(!is.na(IECC_Moisture_Regime) ~ IECC_Moisture_Regime,
                                                  State %in% c('WY','CO') ~ 'B',
                                                  TRUE ~ 'A')) %>%
  dplyr::mutate(IECC = paste0(IECC_Climate_Zone, IECC_Moisture_RegimeX))  %>%
  dplyr::mutate(IECCalt = ifelse((IECC=='NAA'|IECC=='NAB'), as.character(NA), IECC)) %>%
  dplyr::mutate(IECC = case_when(IECCalt %in% c('1A','2A','1A-2A') ~ '1A-2A',
                                 IECCalt %in% c('2B','3A') ~ IECCalt,
                                 IECCalt %in% c('3B','4B','3B-4B') ~ '3B-4B',
                                 IECCalt %in% c('3C','4A','4C','5A') ~ IECCalt,
                                 IECCalt %in% c('5B','5C','5B-5C') ~ '5B-5C',
                                 IECCalt %in% c('6A','6B','6A-6B') ~ '6A-6B',
                                 grepl(IECCalt, pattern = '^7|^8') ~ '7A-7B-7AK-8AK',
                                 T ~ 'WTF'))  # IECC is same aggs as RECS now.


      
PUMS.cz = PUMS.sf %>%
  dplyr::select(GEOID10, STATEFP10) %>%
  dplyr::mutate(state_code = STATEFP10) %>%
  dplyr::mutate(State = fips_codes$state[match(.$STATEFP10, fips_codes$state_code)]) %>% 
  st_centroid(of_largest_polygon=T) %>%
  st_join(czmap %>% dplyr::select(IECC, UATYP10), left = T, join = st_nearest_feature) %>%
  dplyr::select(GEOID10, UATYP10, IECC) %>%
  dplyr::mutate(IECC = ifelse(test = is.na(IECC), 
                              yes  = IECC[!is.na(IECC)][st_nearest_feature(x = PUMS.cz,
                                                                           y = PUMS.cz %>% dplyr::filter(!is.na(IECC)))],
                              no   = IECC)) %>%
  dplyr::select(GEOID10, IECC, UATYP10) %>%
  st_set_geometry(NULL)


PUMS.info = get_acs(geography = 'public use microdata area', variables = c(totalPop = 'B02001_001E', whitePop = 'B02001_002E',
                                                                           blackPop = 'B02001_003E', nativePop = 'B02001_004E',
                                                                           asianPop = 'B02001_005E',
                                                                           totalHhE =  'B25040_001E', utilityGas = 'B25040_002', propane ='B25040_003', electricity = 'B25040_004', fuelOil = 'B25040_005'), 
                    year = 2019, output = 'wide') %>%
  dplyr::filter(GEOID %in% PUMS.sf$GEOID10) %>%
  dplyr::select(-ends_with('M')) %>%
  dplyr::mutate(missing = 1-(utilityGasE+propaneE+electricityE+fuelOilE)/totalHhE,
                sng   = (utilityGasE/totalHhE),
                sprop = (propaneE/totalHhE),
                selec = (electricityE/totalHhE),
                sfo   = (fuelOilE/totalHhE),
                otherPop = totalPop - whitePop - blackPop - nativePop - asianPop,
                whitePop = whitePop/totalPop, blackPop = blackPop/totalPop, 
                nativePop = nativePop / totalPop, asianPop = asianPop/totalPop, 
                otherPop = otherPop/totalPop) %>%
  dplyr::select(GEOID10 = GEOID, PUMS_Hh = totalHhE,missing:sfo, contains('Pop') ) %>%
  left_join(PUMS.cz, by='GEOID10')



###
###
###
### Merge HDD/CDD and COP to PUMS taking household-weighted average
### 
### 

COPl = readRDS(file.path(DATA.PROC, 'WCOP_HDD_PRISM.rds'))
COP_rasterb = COPl[[1]] # raster brick
#--> from 1_Preprocess Data.R


HHraster = HHrasterall = raster(file.path(DATA.ARCHIVE, 'SEDAC_ushh10.asc')) # from SEDAC usgrid https://sedac.ciesin.columbia.edu/data/set/usgrid-summary-file1-2010/data-download
values(HHrasterall) = 0 # rasterToPoints only gives points for non-NA values. Strange!

# This takes a little while
HHpoints = rasterToPoints(HHrasterall, spatial = F) %>% as_tibble() %>% mutate(idx = 1:n()) %>%
  st_as_sf(coords = c('x','y'), crs = st_crs(HHraster)) %>%
  st_join(PUMS.sf %>% dplyr::select(ST = STATEFP10, CDIV, GEOID10) %>% st_transform(st_crs(HHraster)), left=T) %>%
  st_join(czmap %>% dplyr::select(UATYP10, IECC) %>% st_transform(st_crs(HHraster)), left = T)

HHpoints = HHpoints %>%
  dplyr::filter(!duplicated(idx))

HHraster.index = cellFromXY(object = COP_rasterb, xy = st_coordinates(HHpoints))
### TO HERE
HHraster.COP = as.data.table(raster::extract(COP_rasterb, y = HHraster.index))
HHraster.COP[,c('HOUSEHOLDS','ST','GEOID10','UATYP10','IECC','CDIV'):=list(values(HHraster),
                                                                           HHpoints$ST,
                                                                           HHpoints$GEOID10,
                                                                           HHpoints$UATYP10,
                                                                           HHpoints$IECC,
                                                                           HHpoints$CDIV)]

HHraster.COP.PUMA = HHraster.COP[!is.na(GEOID10) & !is.na(HDD65) & HOUSEHOLDS>0,j=list(HDD65           = stats::weighted.mean(365*HDD65/COPl[['counter']], w = HOUSEHOLDS, na.rm=T),
                                                                                       AWCOP65.TOP     = 1/stats::weighted.mean(1/AWCOP65.TOP, w = HOUSEHOLDS, na.rm=T),
                                                                                       AWCOP65.PROTO.DOE = 1/stats::weighted.mean(1/AWCOP65.PROTO.DOE, w = HOUSEHOLDS, na.rm=T)), by = c('GEOID10')]

HHraster.COP.ST = HHraster.COP[!is.na(GEOID10) & !is.na(HDD65) & HOUSEHOLDS>0,j=list(HDD65 = stats::weighted.mean(365*HDD65/COPl[['counter']], w = HOUSEHOLDS),
                                                                                     AWCOP65.TOP = 1/stats::weighted.mean(1/AWCOP65.TOP, w = HOUSEHOLDS),
                                                                                     AWCOP65.PROTO.DOE = 1/stats::weighted.mean(1/AWCOP65.PROTO.DOE, w = HOUSEHOLDS)), by = c('ST')]

write_csv(HHraster.COP.ST, file.path(DATA.PROC, paste0('State_COP_and_HDD.csv')))



#### From Process USRDB Database Rates v4_US.R ####
ratedb = readRDS(file.path(DATA.PROC, 'Spatial_Rate_Database.rds'))    
  #-- NREL maps have better service territories
  #-- Match is done in Process USRDB Database v4 using datagov. Processing HARDI cleans up zips with missing eiaid

fzip.all = ratedb$fzip

if(UseGPSC==T) EIAf = ratedb$EIAf
if(UseGPSC==F) EIAf = ratedb$EIAfng  # EIAf - No Georgia (PSC prices)


############################
##### Zip-level merging ####
############################

#######
## Now, zip-to-PUMA


# Intersect all PUMA and Zips (with EIAID at zip level from datagov)

if(!any(grepl(list.files(DATA.PROC), pattern = 'PUMS_electricity\\.rds'))){  # this takes a while. Hours-ish
PUMS.electricity.int = st_intersection(PUMS.sf,
                                       fzip.all %>% dplyr::select(ZIP_CODE, eiaid, utility_name, CountResid = Count, POPULATION) %>% 
                                         st_transform(crs = st_crs(PUMS.sf)) %>% st_make_valid())


# Group by PUMA and EIAID, collapse across zipcodes. Sum zipcode populations into POPULATION
PUMS.electricity.int2 = PUMS.electricity.int %>% # st_set_geometry(NULL) %>%
  group_by(GEOID10, eiaid, utility_name, CountResid) %>%
  dplyr::summarize(ZIP_CODES = list(as.character(ZIP_CODE)),
                   POPULATION = sum(POPULATION, na.rm=T)) %>% # sum over zips within eiaid
  ungroup()


# Group by PUMA, collapse across EIAID by taking EIAID that represents largest summed POPULATION
PUMS.electricity = PUMS.electricity.int2 %>%
  dplyr::mutate(intArea = st_area(.),
                POPULATION.rates = ifelse(eiaid %in% EIAf, POPULATION, 0)) %>% st_set_geometry(NULL) %>% # lose geometry
  dplyr::arrange(GEOID10, POPULATION,intArea) %>%
  group_by(GEOID10) %>%
  dplyr::summarize(numUtilitiesInPUMA = length(unique(eiaid[as.numeric(intArea)>100000])), # some have border inconsistencies that have 100m2 overlap or so
                   eiaid = eiaid[which.max(POPULATION.rates)[1]],
                   utility_name = utility_name[which.max(POPULATION.rates)[1]],
                   fracAreaNoUtility = sum(intArea[is.na(eiaid)])/sum(intArea))

saveRDS(PUMS.electricity, file.path(DATA.PROC, 'PUMS_electricity.rds'))
} else {
  PUMS.electricity = readRDS(file.path(DATA.PROC, 'PUMS_electricity.rds'))
}

# Merge on GEOID10 to PUMS.sf later




#--> Get all gas service territories:
USgas = geojsonsf::geojson_sf(geojson = 'https://services1.arcgis.com/Hp6G80Pky0om7QvQ/arcgis/rest/services/Natural_Gas_Service_Territories/FeatureServer/0/query?outFields=*&where=1%3D1&f=geojson') %>%
  dplyr::mutate(COMPID = case_when(NAME=='LOBELVILLE, CITY OF' ~ '17608530',
                                   NAME=='CLARKE MOBILE COUNTIES GAS DISTRICT' ~ '17690211',
                                   NAME=='BLACK HILLS ENERGY' & LDC_STATE=='AR' ~ '17671155',
                                   NAME=='BROOKLYN MUNICIPAL UTILITIES' ~ '17601565',
                                   NAME=='AMEREN ILLINOIS' ~ '17671862',
                                   NAME=='PIEDMONT NATURAL GAS' & LDC_STATE=='NC' ~ '17699051',
                                   NAME=='NEW MEXICO GAS COMPANY' ~ '17673850',
                                   NAME=='PEOPLES NATURAL GAS CO.' ~ '17696419',
                                   NAME=='WASHINGTON GAS' & LDC_STATE=='VA' ~'17671254',
                                   NAME=='ATMOS ENERGY - LOUSIANA DIVISION' & LDC_STATE=='LA' ~ '17672060',
                                   NAME=='NORTHERN UTILITIES INC. (UNITIL)' & LDC_STATE=='NH' ~ '17675803', # 2 utilites combined; got new number
                                   NAME=='LIBERTY UTILITIES (ENERGYNORTH)' & LDC_STATE=='NH' ~ '17675803', # 2 utilites combined; got new number
                                   NAME=='ATHENS GAS DEPT, CITY OF' & LDC_STATE=='TN' ~ '17600619TN',
                                   NAME=='MGTC INC' & LDC_STATE=='WY' ~ '17602261', # bought out by black hills
                                   NAME=='BLACK HILLS GAS DISTRIBUTION LLC' & LDC_STATE=='WY' ~ '17602261', # is black hills (private now pub)
                                   NAME=='ENERGY WEST WYOMING' & LDC_STATE=='WY' ~ '17602261', # also bought out by black hills
                                   NAME=='AURORA, CITY OF' & LDC_STATE=='KS' ~ '17606727', # state map shows now covered by Kansas Gas
                                   NAME=='BUCKLEY, CITY OF' & LDC_STATE=='WA' ~ '17614608', # now covered by PSE
                                   T ~ COMPID)) %>%   # this is wrong on the map, doesn't match EIA176
  dplyr::mutate(Company =  paste0(COMPID, LDC_STATE))  # LDC_STATE is actual state, STATE is the loc of the company (lots of LA in TX)


EIA176 = EIA176raw = fread(here('Data','Found Data','EIA Natural Gas','EIA176_06082023.csv'))
colnames(EIA176) = str_replace_all(names(EIA176), pattern='<BR>', '_')
EIA176 = EIA176[,.(Year, State, Company176 = Company, name176 = `Company Name`, 
                   isIOU176 = `Investor_Owned`, isMuni176 = `Municipally_Owned`, isPrivate176 = Privately_Owned, 
                   isCoop176 = Cooperative, isOther176 = `Other_Ownership`,
                   rSales =`Residential_Sales_Volume_(Mcf)`, rPrice =`Residential_Sales_Price_(Dollars)`,
                   rRev =as.numeric(`Residential_Sales_Revenue_(Dollars)`), rCustomers = `Residential_Sales_Customers`)][,ownership176:=case_when(isCoop176=='X' ~ 'Coop',
                                                                                                                                      isPrivate176=='X' ~ 'Private',
                                                                                                                                      isIOU176=='X' ~ 'IOU',
                                                                                                                                      isMuni176=='X' ~ 'Muni',
                                                                                                                                      isOther176=='X' ~ 'Other',
                                                                                                                                      T ~ 'wtf')] %>%
  dplyr::filter(Year>=2014 & Year <= 2020) %>%
  left_join(CPI %>% dplyr::mutate(Yr = lubridate::year(ym)) %>% group_by(Yr) %>% dplyr::summarize(CPI2019 = mean(CPI2019)),
            by = c('Year' = 'Yr')) %>%
  dplyr::mutate(rRev = rRev*CPI2019,
                price176 = rPrice*CPI2019,
                adjustment176 = grepl(name176, pattern='ADJUSTMENT') | substr(Company176, 3,6)=='6999') %>%
  dplyr::select(-CPI2019) %>%
  group_by(Company176) %>%
  dplyr::mutate(ownership176 = dMode(ownership176),  # was mutate, but need 1 record for each company
                avgPrice = mean(price176, na.rm=T),  # Not the price used -- this is the average price over the whole 5 year window; for merging quality
                pctValid = sum(!is.na(rSales))/n()) %>%
  ungroup() %>%
  dplyr::filter(pctValid>.5) %>%
  dplyr::select(Company176, Year, name176, State, ownership176, adjustment176, rSales, rRev, rCustomers, price176, avgPrice, pctValid) %>%
  # dplyr::select(Company176,avgPrice, pctValid) %>%
  dplyr::arrange(State, Company176, Year) %>%
  # dplyr::arrange(Company176) %>%
  setDT()
### THESE ARE DEFLATED TO JAN 2019
### This is NOT used for gas prices. Just for merging on "valid" gas territories.
USgas = USgas %>% left_join(unique(EIA176[,.(Company176, avgPrice, pctValid)]), by = c('Company' = 'Company176')) # added the unique to get company (non-time-varying)info for merging







#--> Join each zip code in fzip to a gas service Company (using "largest" overlap) (but USgas has distributors that have 0 price b/c they have transmission prices)
  fzip.all = st_join(st_make_valid(fzip.all), 
                     USgas %>% 
                       dplyr::filter(!is.na(avgPrice) & avgPrice>0 & pctValid>.50) %>% 
                       dplyr::select(Company, SVCTERID, avgPrice176 = avgPrice) %>% 
                       st_make_valid() %>% st_simplify(dTolerance = .0001), # 27.5 feet or so
                     largest = TRUE,
                     left = TRUE)
          
          xx = fzip.all %>% dplyr::filter(is.na(SVCTERID))
          reslist = list()
          for(i in 1:NROW(xx)){
            idxtemp = st_nearest_feature(xx[i,], fzip.all %>%dplyr::filter(STATE==xx$STATE[i] & !is.na(Company)) %>% st_transform(st_crs(xx)))
            reslist[[i]] = fzip.all %>% st_set_geometry(NULL) %>% dplyr::filter(STATE==xx$STATE[i] & !is.na(Company)) %>% slice(idxtemp) %>% pull(Company)}
          
          xx = xx %>%
            st_set_geometry(NULL) %>% 
            dplyr::select(ZIP_CODE) %>%
            dplyr::mutate(nearestCompany = unlist(reslist))
          
          fzip.all = fzip.all %>%
            dplyr::mutate(noZipgas = is.na(Company)) %>%
            left_join(xx, by = 'ZIP_CODE') %>%
            dplyr::mutate(Company = ifelse(!is.na(Company), Company, nearestCompany))
          rm(xx)
#--> Now have zip-by-zip gas and electctric utility (but some eiaid might not have rates in EIA176)



PUMS.gas = st_join(PUMS.sf, 
                   USgas %>% 
                     dplyr::filter(!is.na(avgPrice) & avgPrice>0 & pctValid>.50) %>%  # need valid prices, drop trans. co's
                     dplyr::select(Company, SVCTERID, avgPrice176 = avgPrice)%>% 
                     st_transform(st_crs(PUMS.sf)) %>% st_make_valid(),
                   left = TRUE,
                   largest = TRUE)

        
        xx = PUMS.gas %>% dplyr::filter(is.na(SVCTERID))
        reslist = list()
        for(i in 1:NROW(xx)){
          idxtemp = st_nearest_feature(xx[i,], PUMS.gas %>%dplyr::filter(STATEFP10==xx$STATEFP[i] & !is.na(Company)) %>% st_transform(st_crs(xx)))
          reslist[[i]] = PUMS.gas %>% st_set_geometry(NULL) %>% dplyr::filter(STATEFP10==xx$STATEFP[i] & !is.na(Company)) %>% slice(idxtemp) %>% pull(Company)
          }
        
        xx = xx %>%
          st_set_geometry(NULL) %>% 
          dplyr::select(GEOID10) %>%
          dplyr::mutate(nearestCompany = unlist(reslist))
      
      PUMS.gas = PUMS.gas %>%
        dplyr::mutate(noPUMSgas = is.na(Company)) %>%
        left_join(xx, by = 'GEOID10') %>%
        dplyr::mutate(Company = ifelse(!is.na(Company), Company, nearestCompany)) 
      rm(xx)
      
      PUMS.gas = PUMS.gas %>%
        left_join(USgas %>% st_set_geometry(NULL) %>% dplyr::select(Company, NAME) %>% 
                    group_by(Company) %>% dplyr::summarize(gutility_name = paste(NAME, collapse = ' ---')) %>%
                    ungroup(),
                  by = 'Company')
      
      


## Links PUMS to Electricity and Gas companies (at PUMS level; can do both at Zip level still - see PUMS.electricity.int2 for zips)
PUMS.util = PUMS.sf %>%
  dplyr::select(-ALAND10, -AWATER10, -LSAD10) %>%
  left_join(PUMS.electricity %>% dplyr::select(GEOID10, eiaid, eutility_name = utility_name),by = 'GEOID10') %>%
  left_join(PUMS.gas %>% st_set_geometry(NULL) %>% dplyr::select(GEOID10, Company, gutility_name, noPUMSgas), by = 'GEOID10') 
#---> And PUMA to PUMA gas and electric utility




###
###
### All PUMA have EIAIDs, but not all PUMAs have gas companies
### While there *shouldn't* be gas service in some areas (FL, etc.), there are reported gas customers in nearly all PUMAs.
### So get nearest
stopifnot(sum(is.na(PUMS.util$eiaid))==0) # shouldn't be anyone without electric service
stopifnot(sum(is.na(PUMS.util$Company))==0) # and shouldn't be anyone without gas (but noPUMSgas means they didn't direclty match)
      



##########
##### Import Prices for nat'l gas, fuel oil, propane
##########

#######
## Natural Gas prices and propane prices
##
# https://www.eia.gov/dnav/pet/PET_PRI_WFR_A_EPLLPA_PRS_DPGAL_W.htm
# 
colnames.prop = readxl::read_xls(file.path(DATA.ARCHIVE,'EIA Prices','PET_PRI_WFR_A_EPLLPA_PRS_DPGAL_W--03242023.xls'), sheet = 'Data 1', skip = 1, n_max = 1, col_names = FALSE) %>% as.data.frame() %>% as.character()

refnames.prop = readxl::read_xls(file.path(DATA.ARCHIVE,'EIA Prices','PET_PRI_WFR_A_EPLLPA_PRS_DPGAL_W--03242023.xls'), sheet = 'Data 1', skip = 2, n_max = 1, col_names = FALSE) %>% as.data.frame() %>% as.character()

refnames.prop =   gsub('Weekly | Propane', '', str_extract(refnames.prop, 'Weekly ([A-Za-z].*) Propane'))
refnames.prop[1] = 'Date'

refcol = cbind(colnames.prop, refnames.prop)[-1,]  

prop = readxl::read_xls(file.path(DATA.ARCHIVE,'EIA Prices','PET_PRI_WFR_A_EPLLPA_PRS_DPGAL_W--03242023.xls'), sheet = 'Data 1', skip = 3, col_types = c('date',rep('numeric', 46)), col_names = colnames.prop) %>% 
  pivot_longer(2:NCOL(.)) %>%
  left_join(x = ., y= refcol, by=c('name' = 'colnames.prop'), copy=T) %>%
  dplyr::mutate(propane.year = ifelse(month(Sourcekey)<5, year(Sourcekey)-1, year(Sourcekey))) %>%
  dplyr::mutate(year = year(Sourcekey),
                ym = as.yearmon(Sourcekey),
                yq = as.yearqtr(Sourcekey),
                STATE.prop = substr(name, 15, 16)) %>%
  left_join(CPI %>% dplyr::select(ym, CPI2019), by = 'ym') %>%
  dplyr::mutate(value = value*CPI2019) %>%
  dplyr::filter(propane.year>=2010 & !is.na(CPI2019)) %>%
  dplyr::select(-CPI2019) %>%
  dplyr::mutate(residualized =value - predict(lm(value ~ as.factor(refnames.prop) + as.factor(Sourcekey), data = .), newdata = .))%>%
  dplyr::arrange(name, Sourcekey) %>%
  group_by(refnames.prop, name, STATE.prop,  yq) %>%
  dplyr::summarize(prop.price = mean(value),
                   prop.sd = sd(value)) %>%
  group_by(refnames.prop, name, STATE.prop) %>%
  tidyr::complete(yq = seq(min(yq), max(yq), by = .25)) %>%
  fill(prop.price, prop.sd, .direction = 'down') %>%
  ungroup()  %>%
  arrange(STATE.prop, yq) 
### THESE ARE DEFLATED TO JAN 2019
### prop starts with STATE/PADD x WEEK, I deflate and bring to STATE/PADD x yq


fo = readxl::read_xls(file.path(DATA.ARCHIVE,'EIA Prices','PET_PRI_WFR_A_EPD2F_PRS_DPGAL_W--03242023.xls'), sheet = 'Data 1', skip = 1, col_types = c('date',rep('numeric', 29))) %>% # dollars per GALLON
  dplyr::filter(Sourcekey>as.Date('2010-01-01')) %>%
  pivot_longer(-Sourcekey) %>%
  dplyr::mutate(STATE = substr(name, 14, 15),
                ym = as.yearmon(Sourcekey),
                yq = as.yearqtr(Sourcekey)) %>%
  # dplyr::filter(STATE!='US') %>%
  dplyr::rename(FOPrice = 'value') %>%
  dplyr::select(-name) %>%
  left_join(CPI %>% dplyr::select(ym, CPI2019), by='ym') %>%
  dplyr::mutate(FOPrice = FOPrice*CPI2019) %>%
  dplyr::filter(!is.na(FOPrice)) %>%
  group_by(yq, STATE) %>%
  dplyr::summarize(FOPrice = mean(FOPrice)) %>%
  dplyr::arrange(STATE, yq) %>%
  dplyr::filter(STATE !='AK' & STATE !='HI') %>%
  group_by(STATE) %>%
  tidyr::complete(yq = seq(min(yq), max(yq), by = .25)) %>%
  fill(FOPrice, .direction = 'down') %>%
  dplyr::mutate(qtr = quarter(yq)) %>%
  ungroup() %>%
  setDT()
### THESE ARE DEFLATED TO JAN 2019





# --> EIA176 datasource for nat'l gas is yearly, but at company x state level (for most but not all companies). Adjustment Companies here have no data, even in EIA176raw
# 
# --> ng176data is taking straight revenue/sales for yearly (deflated) mean, then taking 5-year average
ngeia176dataraw = EIA176 %>% 
  dplyr::select(yr = Year, Company176, name176, ownership176, NGPriceraw = price176) %>%  # price176 is calc. from rev/sales, then deflated.
  # bind_rows(ngdety) %>%
  dplyr::filter(yr>=2015 & yr<=2019) %>%
  group_by(Company176) %>%
  dplyr::summarize(NGPrice176raw = mean(NGPriceraw))     #--------> Average yearly-by-utility average prices (REV/VOL) to utility.

# 
# --> ng176datareg is regressing by-company for fixed (so it matches electricity prices), but this is pretty sketchy....
jSpitOutAVC <- function(dd){
  dlm = tidy(lm(rRev ~ 0 + rCustomers + rSales, data = dd))
  tibble(fixed176 = dlm$estimate[1],
         NGPrice176reg = dlm$estimate[2])
}

ngeia176datareg = EIA176 %>% dplyr::filter(avgPrice>0 & Year>=2015 & Year<=2019) %>%
  group_by(Company176) %>%
  do(jSpitOutAVC(.))

ngeia176data = ngeia176dataraw %>%
  left_join(ngeia176datareg, by = 'Company176')







EIAf = EIAf %>% 
  left_join(CPI %>% dplyr::select(ym, CPI2019), by='ym') %>%
  dplyr::mutate(yr = year(ym),
                cpkwh = cpkwh*CPI2019,
                fixed_cost = fixed_cost*CPI2019,
                Revenue = Revenue*CPI2019,
                seasonWeight = as.integer(Month<=4 | Month >=8)) # using April as prop months (oct-mar) reflect storage of propane


elec = EIAf %>%
  dplyr::filter(Year>=2015 & Year<=2019) %>%
  group_by(eiaid, State) %>%
  dplyr::summarize(cpkwhreg = weighted.mean(cpkwh,na.rm=T,  w = seasonWeight),
                   cpkwhraw = sum(Revenue[seasonWeight==1])/sum(MWH[seasonWeight==1])) %>%  #----> Average quarterly slope coefs. to 2015-2019
  # dplyr::mutate(cpkwh.mbtu = cpkwh*el_to_mbtu) %>%
  dplyr::filter(is.finite(cpkwhreg)) %>%
  ungroup()
### THESE ARE DEFLATED TO JAN 2019





###
### EIA176 has natural gas prices at the yearly level by gas utility
###      (merged as Company on fzip.allgas) (with state level 000ST for those who do not report, the adjustments in EIA176 are empty): ngeia176data (raw for raw calc, reg for regression-bsaed)
### ng has natural gas prices at the state level by quarter (from weekly reports) (deseasoned)
### fo has fuel oil at the state/PADD level by quarter
### prop has propane at the state/PADD level by quarter
### EIAf has electricity cost quarterly fixed and marginal electricity costs at the electric utility (EIAID) level with state for remainder (not all report). 
###     - from Regression-based (23 month window local linear regression by quarter), and uses GA utility survey data (replaced in-line in ratedb$EIAf)
### Soren uses EIAf source, but at the state level with average calculated
### 







###
### Merge USprices for state-level gas and propane and fuel oil
### and merge elec for eiaid-level electricity
### and EIA176 gas prices (by company instead of by state)
### and cz
### 


PUMS.utilprices.st = PUMS.util %>%
  st_set_geometry(NULL) %>%
  dplyr::mutate(state_code = STATEFP10) %>%
  left_join(fips_codes %>% dplyr::select(STATEFP10 = state_code, STATE = state) %>% distinct(), by = 'STATEFP10') %>%
  dplyr::select(STATE, GEOID10) %>%
  unique() %>%
  dplyr::mutate(STATE.prop = case_when(  #----------------> State or PADD or nearest PADD for propane
    STATE %in% prop$STATE.prop ~ STATE,
    STATE %in% c('ME','NH','VT','MA','CT','RI') ~ '1X',
    STATE %in% c('NY','PA','NJ','DE','MD','DC') ~ '1Y',
    STATE %in% c('WV','VA','NC','SC','GA','FL') ~ '1Z',
    STATE %in% c('ND','SD','NE','KS','OK','MN','IA','MO',
                 'TN','KY','IN','OH','IL','WI','MI') ~ '20',
    STATE %in% c('NM','TX','AR','LA','MS','AL') ~ '30',
    STATE %in% c('ID','MT','WY','UT','CO') ~ '40',
    STATE %in% c('WA','OR','CA','NV','AZ') ~ '40')) %>%
  left_join(prop %>% dplyr::select(yq, STATE.prop, prop.price) %>%
              group_by(STATE.prop) %>%
              dplyr::filter(year(yq)>=2015 & year(yq)<=2019) %>%
              dplyr::summarize(prop.pricest = mean(prop.price, na.rm=T)),
            by = c('STATE.prop')) %>%
  dplyr::mutate(STATE.fo = case_when(    #----------------> State or PADD or US for fuel oil
    STATE %in% fo$STATE ~ STATE,
    STATE %in% c('ME','NH','VT','MA','CT','RI') ~ '1X',
    STATE %in% c('NY','PA','NJ','DE','MD','DC') ~ '1Y',
    STATE %in% c('WV','VA','NC','SC','GA','FL') ~ '1Z',
    STATE %in% c('ND','SD','NE','KS','OK','MN','IA','MO',
                 'TN','KY','IN','OH','IL','WI','MI') ~ '20',
    STATE %in% c('NM','TX','AR','LA','MS','AL') ~ 'US',
    STATE %in% c('ID','MT','WY','UT','CO') ~ 'US',
    STATE %in% c('WA','OR','CA','NV','AZ') ~ 'US')) %>%
  left_join(fo %>% dplyr::select(yq, STATE, STATE.fo = STATE, FOPrice) %>%
              group_by(STATE.fo) %>%
              dplyr::filter(year(yq)>=2015 & year(yq)<=2019) %>%
              dplyr::summarize(FOPricest = mean(FOPrice, na.rm=T)),
            by = c('STATE.fo')) %>%
  dplyr::select(GEOID10, prop.pricest, FOPricest)




##### Here be where we choose the NG price
##### Used "reg" in first submission, used "raw" in RR1 2-2024

usingMethod = 'raw'  # 'reg'

usingNG = 'NGPrice176raw'
usingEC = 'cpkwhraw'


PUMS.utilprices.gas = PUMS.util %>% 
  st_set_geometry(NULL) %>%
  left_join(ngeia176dataraw %>% dplyr::select(Company176, NGPrice176 = !!usingNG), by = c('Company' = 'Company176'))
 


PUMS.utilprices.elec = PUMS.util %>% 
  dplyr::select(-gutility_name, -noPUMSgas, Company) %>%
  st_set_geometry(NULL) %>% ungroup() %>%
  dplyr::mutate(State = fips_codes$state[match(.$STATEFP10, fips_codes$state_code)]) %>% 
  left_join(elec %>% dplyr::filter(eiaid!=0) %>% rename(cpkwh.matched = !!usingEC) %>% dplyr::select(State, eiaid, cpkwh.matched), by=c('State','eiaid')) %>%
  dplyr::mutate(noPUMSelec = is.na(cpkwh.matched)|!is.finite(cpkwh.matched)) %>%
  left_join(elec %>% dplyr::filter(eiaid==0) %>% rename(cpkwh.state = !!usingEC) %>% dplyr::select(State, cpkwh.state), by = c('State')) %>%
  dplyr::mutate(cpkwh = ifelse(noPUMSelec==T, cpkwh.state, cpkwh.matched))



PUMS.utilprices =  PUMS.util %>%
  left_join(PUMS.utilprices.gas %>%  dplyr::select(GEOID10, NGPrice176),              by = 'GEOID10') %>%
  left_join(PUMS.utilprices.elec %>% dplyr::select(GEOID10, cpkwh,noPUMSelec),        by = 'GEOID10') %>%
  left_join(PUMS.utilprices.st %>%   dplyr::select(GEOID10, prop.pricest, FOPricest), by = 'GEOID10') %>%
  dplyr::select(GEOID10,
                eiaid, eutility_name, noPUMSelec, cpkwh, 
                Company, gutility_name, noPUMSgas, NGPrice176, prop.pricest, fo.pricest = FOPricest) %>%
  st_set_geometry(NULL)




  

  
  


        ## Convert to "per vent-gate MBTU" prices
        # https://www.eia.gov/energyexplained/units-and-calculators/
        # These are price-to-ventgate conversions
        prop.conv = .85    ## Was .85: AFUE for top of medium-efficiency is 85: https://www.advancedpropaneinc.com/2020/02/17/efficiency-of-propane-furnaces/#:~:text=Medium%20efficiency%3A%20AFUE%20ratings%20are,are%20between%2090%20and%2098.5.
        ng.conv   = .93   ## AFUE for top of mid-efficiency is 93: https://jacobsheating.com/blog/afue-rating/#:~:text=The%20minimum%20AFUE%20rating%20is,from%2094%20and%2098.5%20percent.
        fo.conv   = .90   # Was .90   See: https://en.wikipedia.org/wiki/Annual_fuel_utilization_efficiency#cite_note-4 (says .89)

        
        
        prop_to_mbtu =  (1/91.5)*(1/prop.conv)
        el_to_mbtu   =  (1/3.412)     # Heat content of electricity
        ng_to_mbtu   =  1000/(1037*1000)*(1/ng.conv)   # https://www.eia.gov/totalenergy/data/monthly/pdf/sec12_5.pdf  1.037 is BTU:ft^3
        fo_to_mbtu   =  (1/.137381)/1000*(1/fo.conv)


PUMS.all = PUMS.sf %>%
  dplyr::mutate(State = fips_codes$state[match(STATEFP10, fips_codes$state_code)]) %>%
  dplyr::select(GEOID10, STATEFP10, State, NAME10) %>%
  left_join(PUMS.utilprices, by='GEOID10') %>%
  left_join(PUMS.info, by='GEOID10') %>%
  left_join(HHraster.COP.PUMA, by = 'GEOID10') %>%  # was PUMS.COP, which is no longer made
  dplyr::mutate(cpkwh.mbtu      = cpkwh*el_to_mbtu,
                NGPrice176.mbtu = NGPrice176*ng_to_mbtu,
                propst.mbtu     = prop.pricest*prop_to_mbtu,
                fost.mbtu       = fo.pricest*fo_to_mbtu,
                URBAN = ifelse(UATYP10 %in% c('C','U'), 'U','R'),
                elec_prem = cpkwh.mbtu -NGPrice176.mbtu ) %>%
  dplyr::select(-UATYP10)

   




###
###
### PUMS Data
### 
### 


PUMA.raw = get_pums(state = 'all',
                    variables = c('PUMA','HINCP','RMSP', 'BDSP', 'YBL','HFL','RAC1P'),  
                    survey = 'acs5', year = 2019,
                    variables_filter = list(SPORDER = 1),  # housing variables only (takes first respondent)
                    rep_weights = FALSE, # don't get series of replicate weights
                    recode = TRUE)

PUMA.proc = PUMA.raw %>%
  dplyr::mutate(GEOID10 = paste0(ST, PUMA)) %>%
  dplyr::select(SERIALNO, ST, ST_label, WGTP, GEOID10, HINCP, RMSP, BDSP, YBL, YBL_label, HFL, HFL_label, RAC1P)  %>%
  dplyr::filter(HINCP!=-60000 & RMSP!=0 & BDSP != -1 & !grepl(GEOID10, pattern = '^15|^02')) %>%  # These are NA's according to documentation, and drop AK and HI
  dplyr::arrange(HINCP) %>%
  dplyr::mutate(HINC = factor(case_when(HINCP < 10000 ~ 'Less than 10k',  # this is Section 3 details
                                        HINCP < 15000 ~ '10-15k',
                                        HINCP < 25000 ~ '15-25k',
                                        HINCP < 35000 ~ '25-35k',
                                        HINCP < 50000 ~ '35-50k',
                                        HINCP < 75000 ~ '50-75k',
                                        HINCP <100000 ~ '75-100k',
                                        HINCP <150000 ~ '100-150k',
                                        HINCP <200000 ~ '150-200k',
                                        HINCP>=200000 ~ '200k+'),
                              levels = c('Less than 10k','10-15k','15-25k','25-35k','35-50k','50-75k',
                                         '75-100k','100-150k','150-200k','200k+'))) %>%
  dplyr::mutate(MONEYPY20 = as.integer(cut(HINCP,   # match EIA RECS 2020
                              breaks = c(-Inf, 5000, 7499, 9999, 12499, 14999, 19999, 24999, 29999, 34999, 39999, 49999, 59999, 74999, 99999, 149999, Inf),
                              labels = c(1:16))),
                MONEYPY   = as.integer(cut(HINCP,   # match EIA RECS 2020
                                         breaks = c(-Inf, 20000, 39999, 59999, 79999, 99999, 119999, 139999, Inf),
                                         labels = c(1:8))),
                BEDROOMS = case_when(BDSP<=6 ~ BDSP,  # match EIA RECS 2015/2020 BEDROOMS
                                     BDSP>6 ~ 6),
                TOTROOMS = case_when(RMSP<1 ~ 1,  # match EIA RECS 2015/2020 TOTROOMS
                                     RMSP>=1 & RMSP<=15 ~ RMSP,
                                     RMSP>15 ~ 15),
                FUELHEAT = case_when(HFL == 0 ~ -2,
                                     HFL == 1 ~  1,
                                     HFL == 2 ~  2,
                                     HFL == 3 ~  5,
                                     HFL == 4 ~  3,
                                     HFL == 5 ~  99,  # 21 in 2015
                                     HFL == 6 ~  7,   # 6 in PUMS is wood; 7 in RECS20 is wood. WILL BE DROPPED
                                     HFL == 7 ~  5,   # PUMA 7 is solar -- goes to regular old electricity
                                     HFL == 8 ~  99,  # 21 in 205
                                     HFL == 9 ~  -2),
                FUELHEAT15=case_when(HFL == 0 ~ -2,
                                     HFL == 1 ~  1,
                                     HFL == 2 ~  2,
                                     HFL == 3 ~  5,
                                     HFL == 4 ~  3,
                                     HFL == 5 ~  21,  # 21 in 2015
                                     HFL == 6 ~  7,  
                                     HFL == 7 ~  5, # PUMA 7 is solar -- goes to regular old electricity
                                     HFL == 8 ~  21,  # 21 in 205
                                     HFL == 9 ~  -2)) %>%
  dplyr::mutate(YEARMADERANGEf = factor(case_when(YBL_label %in% c('1939 or earlier','1940 to 1949') ~ 'Before 1950',
                                                 YBL_label %in% c('2000 to 2004','2005','2006','2007','2008','2009') ~ '2000 to 2009',
                                                 YBL_label %in% as.character(2010:2020) ~ '2010 to 2020',
                                                 TRUE ~ YBL_label),
                                       levels = c('Before 1950','1950 to 1959','1960 to 1969','1970 to 1979',
                                                  '1980 to 1989','1990 to 1999','2000 to 2009','2010 to 2020')),
                YEARMADERANGE = as.character(YEARMADERANGEf),
                RACE = case_when(RAC1P == '1' ~ 'White',
                                 RAC1P == '2' ~ 'Black',
                                 RAC1P %in% c('3','4','5') ~ 'Native Am.',
                                 RAC1P %in% c('6') ~ 'Asian',
                                 RAC1P %in% c('7','8','9') ~ 'Other')) %>%  # pacific islander to other per Soren setup
  left_join(PUMS.all %>% st_set_geometry(NULL) %>% dplyr::select(GEOID10, IECC, HDD65), by = 'GEOID10') %>%
  dplyr::filter(FUELHEAT != -2 & FUELHEAT !=99 & FUELHEAT !=21 & FUELHEAT !=7)




###
###
### Get REC'd
### 
### 


  RECS15 = fread(file.path(DATA.ARCHIVE, 'RECS','recs2015_public_v4.csv'))[,IECC:=IECC_CLIMATE_PUB][,RECS:=2015]
  RECS20 = fread(file.path(DATA.ARCHIVE, 'RECS','recs2020_public_v4.csv'))[,IECC:=IECC_climate_code][,RECS:=2020]

RECS = rbindlist(list(RECS20, RECS15), fill=T)[,.(DOEID, NWEIGHT = NWEIGHT/100, RECS, STATE_FIPS, state_postal, IECCalt = IECC, HDD65, 
                                                  YEARMADERANGEraw = YEARMADERANGE, MONEYPYraw = MONEYPY,
                                                  BEDROOMS, TOTROOMS,
                                                  FUELHEAT, EQUIPM, TOTALBTUSPH)][FUELHEAT != -2 & FUELHEAT !=99 & FUELHEAT !=21 & FUELHEAT !=7,]

## remap RECS to PUMS
RECS[,FUELHEAT:=ifelse(FUELHEAT==21, 99, FUELHEAT)]  # 2015 used 21 for Other, 2020 used 99; coded PUMS FUELHEAT to 2020's 99

RECS[,YEARMADERANGEraw:=ifelse(YEARMADERANGEraw>8, 8, YEARMADERANGEraw)]  # only 2020 has 9's and those are 2016-2020
RECS$YEARMADERANGE=levels(PUMA.proc$YEARMADERANGEf)[RECS$YEARMADERANGEraw]  # levels are in order

RECS[, MONEYPY:=as.integer(case_when(RECS==2015 ~ MONEYPYraw, # PUMA.proc is set to RECS2015 income categories (most coarse)
                                     MONEYPYraw <=6  ~ 1,  # up to 20000
                                     MONEYPYraw <=10 ~ 2,
                                     MONEYPYraw <=12 ~ 3,
                                     MONEYPYraw <=13 ~ 4, # up to 74k in 2020 OR 80k in 2015
                                     MONEYPYraw <=14 ~ 5,
                                     MONEYPYraw <=15 ~ 7,
                                     MONEYPYraw > 15 ~ 8))]


RECS[,c('BEDROOMS','TOTROOMS'):=list(
  ifelse(BEDROOMS>6, 6, BEDROOMS),   # 6 on RECS20, 30 on RECS15!
  ifelse(TOTROOMS>15, 15, TOTROOMS))]


RECS[,c('HPuser','OTHuser'):=list(EQUIPM==4 | EQUIPM==13,
                                EQUIPM %in% c(-2, 8, 9, 12, 21,99) | FUELHEAT %in% c(-2, 7, 8,9,21,99))] # for sample construction

RECS[,IECC:=case_when(IECCalt %in% c('1A','2A','1A-2A') ~ '1A-2A',
                      IECCalt %in% c('2B','3A') ~ IECCalt,
                      IECCalt %in% c('3B','4B','3B-4B') ~ '3B-4B',
                      IECCalt %in% c('3C','4A','4C','5A') ~ IECCalt,
                      IECCalt %in% c('5B','5C','5B-5C') ~ '5B-5C',
                      IECCalt %in% c('6A','6B','6A-6B') ~ '6A-6B',
                      grepl(IECCalt, pattern = '^7|^8') ~ '7A-7B-7AK-8AK',
                      T ~ 'WTF')][,IECCalt:=NULL]

RECS[,heatTech:=case_when(FUELHEAT==1 ~ 'ng',   # natural gas
                          FUELHEAT==2 ~ 'prop',   # propane)]
                          FUELHEAT==3|FUELHEAT==4 ~ 'fo',   # fo (fuel oil) (in 2009, 4 is Kerosene and 3 is FO, in 2009 3 is FO&Kerosene)
                          FUELHEAT==5 & EQUIPM %in% c(4,13) ~ 'Heat Pump',
                          FUELHEAT==5 ~ 'Elec. Resistance',
                          FUELHEAT %in% c(7,8,9,21,-2, 99) ~ as.character(NA),  # Wood, Solar, District Steam // in 2015 only 7 and 9
                          TRUE ~ as.character(NA))]

RECS[,effFactor:=case_when(heatTech=='ng'   ~ ng.conv,   # natural gas
                           heatTech=='prop' ~ prop.conv,   # propane)]
                           heatTech=='fo'   ~ fo.conv,   # fo (fuel oil) and Kerosene
                           heatTech=='Heat Pump' ~ as.numeric(NA),  # use NA here so it isn't part of estimation # USE 1 HERE -- ESTIMATE RELATIONSHIP (o/w it is dropped)
                           heatTech=='Elec. Resistance' ~ 1,
                           is.na(heatTech) ~ as.numeric(NA),
                           TRUE ~ as.numeric(NA))]

RECS[,c('heatTech','TOTALBTUSPHuse'):=list(
  fct_relevel(heatTech, 'Elec. Resistance'),
  TOTALBTUSPH*effFactor)]  # This is "vent-gate" MBTU consumption


## Set aside RECS shares by state of electric resistance, electric HP
RECS.heatshares1 = RECS[FUELHEAT==5,j=list(shp = weighted.mean(EQUIPM %in% c(4,13), w = NWEIGHT),
                                          selr = weighted.mean(!EQUIPM %in% c(4,13), w = NWEIGHT),
                                          N = .N), by = c('IECC','MONEYPY')][order(IECC, MONEYPY, N),]
RECS.heatshares2 = RECS[FUELHEAT==5,j=list(shp = weighted.mean(EQUIPM %in% c(4,13), w = NWEIGHT),
                                           selr = weighted.mean(!EQUIPM %in% c(4,13), w = NWEIGHT),
                                           N = .N), by = c('IECC')][order(IECC, N),]

        
        ## some checks for compatibility between PUMA and RECS variables ##
        jDiff<-function(dx, dy){
          list(setdiff(dx, dy),
               setdiff(dy, dx))
        
        }
        # should all be 0 length
        jDiff(RECS$MONEYPY, PUMA.proc$MONEYPY)
        jDiff(RECS$IECC, PUMA.proc$IECC)
        jDiff(RECS$YEARMADERANGE, PUMA.proc$YEARMADERANGE)
        jDiff(RECS$BEDROOMS, PUMA.proc$BEDROOMS)
        jDiff(RECS$TOTROOMS, PUMA.proc$TOTROOMS)
        jDiff(RECS$FUELHEAT, PUMA.proc$FUELHEAT)

        
        
RECS[,samp1:=HPuser==F & OTHuser==F & FUELHEAT!= -2 & TOTALBTUSPH>0 & HDD65>0 & !is.na(TOTALBTUSPHuse),]   #---> this one is used     
RECSs = RECS[samp1==T,]


pmod1 = fepois(TOTALBTUSPHuse ~ as.factor(IECC)*as.factor(FUELHEAT) + as.factor(YEARMADERANGE)*poly(HDD65, 3) + as.factor(MONEYPY) + as.factor(TOTROOMS) + as.factor(BEDROOMS),
             cluster = ~ IECC, data = RECSs, weights = ~NWEIGHT)

  

RECSs[,preg1:=predict(pmod1, newdata = .SD)]
RECSs[,preg1resid:=preg1 - TOTALBTUSPHuse]




PUMA.proc = PUMA.proc %>%
  dplyr::mutate(pred.mbtu = predict(pmod1, newdata = .),
                pred.mbtu.Standard = predict(pmod1, newdata = PUMA.proc %>%
                                                     dplyr::mutate(YEARMADERANGE = '1960 to 1969',
                                                                   MONEYPY = 2,
                                                                   TOTROOMS = as.integer(round(weighted.mean(RECS20[abs(TOTHSQFT-1500)<50, TOTROOMS], RECS20[abs(TOTHSQFT-1500)<50, NWEIGHT]))),
                                                                   BEDROOMS = as.integer(round(weighted.mean(RECS20[abs(TOTHSQFT-1500)<50, BEDROOMS], RECS20[abs(TOTHSQFT-1500)<50, NWEIGHT]))))))








###
### Predicted costs and Savings
###
###
rr = .07 # discount rate
ll = 20  # lifetime 


PUMA.analysis = PUMA.proc %>%
  left_join(PUMS.all %>% dplyr::select(GEOID10, URBAN, noPUMSgas, noPUMSelec, contains('mbtu'), contains('AWCOP')) %>% st_set_geometry(NULL),
            by = 'GEOID10') %>%
  left_join(RECS.heatshares1[,.(IECC, MONEYPY, shp, selr)], by = c('IECC','MONEYPY')) %>% 
  dplyr::mutate(WGTP = WGTP*ifelse(FUELHEAT==5, selr, 1)) %>%   # adjust weight for elec. fuel by selr (so if 0 weight on selr, then doesn't count for averages)
  dplyr::mutate(pred.cost.prop = pred.mbtu*propst.mbtu, # these are already "per vent-gate MBTU" prices. This is cost if prop, fo, ...
                pred.cost.fo   = pred.mbtu*fost.mbtu,
                pred.cost.ng176= pred.mbtu*NGPrice176.mbtu,
                pred.cost.elr  = pred.mbtu*cpkwh.mbtu,
                pred.cost.elhpACOP.TOP = pred.mbtu*cpkwh.mbtu/AWCOP65.TOP,
                # pred.cost.elhpACOP.PROTO = pred.mbtu*cpkwh.mbtu/AWCOP65.PROTO,
                pred.cost.elhpACOP.PROTO.DOE = pred.mbtu*cpkwh.mbtu/AWCOP65.PROTO.DOE) %>%
  dplyr::mutate(pred.cost.prop.Standard = pred.mbtu.Standard*propst.mbtu, # this is cost for standard (1500sqft 1965) if prop, fo, ...
                pred.cost.fo.Standard   = pred.mbtu.Standard*fost.mbtu,
                pred.cost.ng176.Standard= pred.mbtu.Standard*NGPrice176.mbtu,
                pred.cost.elr.Standard  = pred.mbtu.Standard*cpkwh.mbtu,
                pred.cost.elhpACOP.TOP.Standard = pred.mbtu.Standard*cpkwh.mbtu/AWCOP65.TOP,
                # pred.cost.elhpACOP.PROTO.Standard = pred.mbtu.Standard*cpkwh.mbtu/AWCOP65.PROTO,
                pred.cost.elhpACOP.PROTO.DOE.Standard = pred.mbtu.Standard*cpkwh.mbtu/AWCOP65.PROTO.DOE) %>%
  dplyr::mutate(save.prop = pred.cost.prop - pred.cost.elhpACOP.TOP,   # savings if moving from prop, fo, ...
                save.fo =   pred.cost.fo - pred.cost.elhpACOP.TOP,
                save.ng176= pred.cost.ng176 - pred.cost.elhpACOP.TOP,
                save.elhp.TOP =  pred.cost.elr - pred.cost.elhpACOP.TOP,
                # save.elhp.PROTO = pred.cost.elhpACOP.TOP - pred.cost.elhpACOP.PROTO,  
                save.elhp.PROTO.DOE = pred.cost.elhpACOP.TOP - pred.cost.elhpACOP.PROTO.DOE,
                save.own.TOP =     case_when(FUELHEAT==1 ~ save.ng176,    # savings for own heat tech ###################
                                             FUELHEAT==2 ~ save.prop,
                                             FUELHEAT==3 ~ save.fo,
                                             FUELHEAT==5 ~ save.elhp.TOP,
                                             T ~ NA),
                save.own.TOP.max = pmax(save.own.TOP, 0),  # use this
                save.own.TOP.max.pct = 100*pmax(save.own.TOP, 0)/pmax(0, HINCP),
                save.own.TOP.max.pctc = 100*pmax(save.own.TOP, 0)/pmax(5000, HINCP),  # this is used
                # save.own.PROTO=    case_when(FUELHEAT==1 ~ save.ng176 + save.elhp.PROTO,
                #                              FUELHEAT==2 ~ save.prop + save.elhp.PROTO,
                #                              FUELHEAT==3 ~ save.fo + save.elhp.PROTO,
                #                              FUELHEAT==5 ~ save.elhp.TOP + save.elhp.PROTO,
                #                              T ~ NA),
                save.own.PROTO.DOE=case_when(FUELHEAT==1 ~ save.ng176  + save.elhp.PROTO.DOE,
                                             FUELHEAT==2 ~ save.prop + save.elhp.PROTO.DOE,
                                             FUELHEAT==3 ~ save.fo + save.elhp.PROTO.DOE,
                                             FUELHEAT==5 ~ save.elhp.TOP + save.elhp.PROTO.DOE,
                                             T ~ NA)) %>%
  dplyr::mutate(save.prop.Standard = pred.cost.prop - pred.cost.elhpACOP.TOP, # savings if standard cons and prop, fo, ..
                save.fo.Standard =   pred.cost.fo - pred.cost.elhpACOP.TOP,
                # save.ng =   pred.cost.ng - pred.cost.elhpACOP.TOP,
                save.ng176.Standard= pred.cost.ng176 - pred.cost.elhpACOP.TOP,
                save.elhp.TOP.Standard =  pred.cost.elr - pred.cost.elhpACOP.TOP,
                # save.elhp.PROTO.Standard = pred.cost.elhpACOP.TOP - pred.cost.elhpACOP.PROTO,
                save.elhp.PROTO.DOE.Standard = pred.cost.elhpACOP.TOP - pred.cost.elhpACOP.PROTO.DOE,
                save.own.TOP.Standard =     case_when(FUELHEAT==1 ~ save.ng176.Standard,  # own savings if using standard...
                                             FUELHEAT==2 ~ save.prop.Standard,
                                             FUELHEAT==3 ~ save.fo.Standard, 
                                             FUELHEAT==5 ~ save.elhp.TOP.Standard,
                                             T ~ NA),
                # save.own.PROTO.Standard=    case_when(FUELHEAT==1 ~ save.ng176.Standard + save.elhp.PROTO.Standard,
                #                              FUELHEAT==2 ~ save.prop.Standard + save.elhp.PROTO.Standard,
                #                              FUELHEAT==3 ~ save.fo.Standard + save.elhp.PROTO.Standard,
                #                              FUELHEAT==5 ~ save.elhp.TOP.Standard + save.elhp.PROTO.Standard,
                #                              T ~ NA),
                save.own.PROTO.DOE.Standard=case_when(FUELHEAT==1 ~ save.ng176.Standard + save.elhp.PROTO.DOE.Standard,
                                             FUELHEAT==2 ~ save.prop.Standard  + save.elhp.PROTO.DOE.Standard,
                                             FUELHEAT==3 ~ save.fo.Standard  + save.elhp.PROTO.DOE.Standard,
                                             FUELHEAT==5 ~ save.elhp.TOP.Standard  + save.elhp.PROTO.DOE.Standard,
                                             T ~ NA))  %>%
  dplyr::mutate(HINC = relevel(HINC, '200k+'),
                RACE = factor(RACE, levels = c('White','Black','Asian','Native Am.','Other')),
                URBAN = factor(URBAN, levels = c('U','R')),
                HP_positive = save.own.TOP>0)  %>%
  dplyr::mutate(switchingCost = as.numeric(1/CPI[CPI$ym=='Jul 2015', 'CPI2019'])*
                                          case_when(FUELHEAT==1 ~ mean(c(6700, 4600)) - mean(c(2200, 4300)), #these are from EPA doc cited in paper
                                          FUELHEAT==2 ~ 5400 - mean(c(2600,1700)),
                                          FUELHEAT==3 ~ mean(c(5400, 7200)) - mean(c(4200, 6000)),
                                          FUELHEAT==5 ~ mean(c(3700,5300,3600,5100)) - mean(c(1700)))) %>%  # there isn't a diagonal "re-adopt" price here so 1700. Do I need to delete the others?
dplyr::mutate(save.own.TOP.net = save.own.TOP - switchingCost/(sum((1+rr)^(-1*(0:(ll-1))))),
              save.own.TOP.net.max = pmax(0, save.own.TOP.net),
              save.own.TOP.net.max.pct  = 100*save.own.TOP.net.max/pmax(0, HINCP),
              save.own.TOP.net.max.pctc = 100*save.own.TOP.net.max/pmax(5000, HINCP) )





PUMA.analysis.collapse  = PUMA.analysis %>%
  group_by(GEOID10) %>%
  dplyr::summarize(pred.mbtu = weighted.mean(pred.mbtu, w = WGTP),
                   save.prop = weighted.mean(save.prop, w = WGTP),
                   save.fo = weighted.mean(save.fo, w = WGTP),
                   save.ng176 = weighted.mean(save.ng176, w = WGTP),
                   save.elhp.TOP = mean(save.elhp.TOP, w = WGTP),
                   # save.elhp.PROTO = mean(save.elhp.PROTO, w = WGTP),
                   save.elhp.PROTO.DOE = mean(save.elhp.PROTO.DOE, w = WGTP),
                   composite.save3176.TOP = weighted.mean(save.own.TOP, w = WGTP),   ## The composite save measures are the "own" savings (adj. for percent elec. resist/elec hp)
                   composite.save3176.TOP.net = weighted.mean(save.own.TOP.net, w = WGTP),   ## 
                   composite.save3176.TOP.net.max = weighted.mean(save.own.TOP.net.max, w = WGTP), 
                   # composite.save3176.PROTO = weighted.mean(save.own.PROTO, w = WGTP),
                   composite.save3176.PROTO.DOE = weighted.mean(save.own.PROTO.DOE, w = WGTP), ######
                     pred.mbtu.Standard = weighted.mean(pred.mbtu.Standard, w = WGTP),
                     save.prop.Standard = weighted.mean(save.prop.Standard, w = WGTP),
                     save.fo.Standard = weighted.mean(save.fo.Standard, w = WGTP),
                     save.ng176.Standard = weighted.mean(save.ng176.Standard, w = WGTP),
                     save.elhp.TOP.Standard = mean(save.elhp.TOP.Standard, w = WGTP),
                     # save.elhp.PROTO.Standard = mean(save.elhp.PROTO.Standard, w = WGTP),
                     save.elhp.PROTO.DOE.Standard = mean(save.elhp.PROTO.DOE.Standard, w = WGTP),
                     composite.save3176.TOP.Standard = weighted.mean(save.own.TOP.Standard, w = WGTP),
                     # composite.save3176.PROTO.Standard = weighted.mean(save.own.PROTO.Standard, w = WGTP),
                     composite.save3176.PROTO.DOE.Standard = weighted.mean(save.own.PROTO.DOE.Standard, w = WGTP),
                       sng.analysis = weighted.mean(FUELHEAT==1, w = WGTP),
                       sprop.analysis = weighted.mean(FUELHEAT==2, w = WGTP),
                       sfo.analysis = weighted.mean(FUELHEAT==3, w = WGTP),
                       selec.analysis = weighted.mean(FUELHEAT==5, w = WGTP),
                       sother.analysis = weighted.mean(is.na(FUELHEAT), w = WGTP),
                         share_HP_positive = weighted.mean(save.own.TOP.net>0, w = WGTP))
                   

PUMA.analysis.collapse = PUMS.all %>%  # has PUMA-level shares as well missing, sng, sprop, ...
  left_join(PUMA.analysis.collapse %>% ungroup(), by = 'GEOID10')   # %>%
  # left_join(RECS.heatshares2[,.(IECC, shp, selr)], by = 'IECC') %>%  # collapsed without MONEYPY
  # dplyr::mutate(composite.save3176.TOP.oldway = sng*save.ng176.Standard +  # oldway is applying shares from census at PUMS level to PUMS-composite
  #                 sprop*save.prop.Standard +
  #                 selec*selr*save.elhp.TOP.Standard + selec*shp*0 + 
  #                 sfo*save.fo.Standard,
  #               composite.save3176.PROTO.oldway = sng*(save.ng176.Standard+save.elhp.PROTO.Standard) +
  #                 sprop*(save.prop.Standard+save.elhp.PROTO.Standard) +
  #                 selec*selr*(save.elhp.TOP.Standard+save.elhp.PROTO.Standard) + selec*shp*save.elhp.PROTO.Standard +
  #                 sfo*(save.fo.Standard+save.elhp.PROTO.Standard),
  #               composite.save3176.PROTO.DOE.oldway = sng*(save.ng176.Standard+save.elhp.PROTO.Standard+save.elhp.PROTO.DOE.Standard) +
  #                 sprop*(save.prop.Standard+save.elhp.PROTO.Standard+save.elhp.PROTO.DOE.Standard) +
  #                 selec*selr*(save.elhp.TOP.Standard+save.elhp.PROTO.Standard+save.elhp.PROTO.DOE.Standard) + selec*shp*(save.elhp.PROTO.Standard + save.elhp.PROTO.DOE.Standard) +
  #                 sfo*(save.fo.Standard+save.elhp.PROTO.Standard+save.elhp.PROTO.DOE.Standard))

PUMA.analysis.collapse = PUMA.analysis.collapse %>%
  st_transform(5070)




### Make Maps ####
### 

  
  fillram = scale_fill_viridis_c(option = 'magma', labels = dollar) 
  fillram600 = scale_fill_viridis_c(option= 'magma', labels = dollar, breaks = c(-600, -300, 0, 300, 600)) 
  colram = scale_color_viridis_c(option = 'magma')
  
  fillram.ratio = fillram.ratio01 = scale_fill_viridis_c(option = 'D')
  colram.ratio  = colram.ratio01 = scale_color_viridis_c(option = 'D')
  
  fillram.comma = scale_fill_viridis_c(option = 'magma', labels = comma) 
  colram.comma = scale_color_viridis_c(option = 'magma')
  
  g = ggplot(PUMA.analysis.collapse) + theme_void() + guides(col = 'none') + theme(legend.title = element_text(size = 8))   

  
  
## Most results are for UseGPSC==T (uses GPSC data)
## at end, will run again under if(UseGPSC==F) to output GA versions for Apx D
if(UseGPSC==T){
  
  ###
  ### Total cost of heating
  ### 
  # deleted as collapsing to PUMA makes this meaningless
  
  ###
  ### Savings from HPs over various technologies  
  ###
  elhpTOPsvgg = g +    geom_sf(aes(fill = save.elhp.TOP,  col = save.elhp.TOP), lwd=0)  + labs(fill = 'Annual top-selling hp savings\nover elec. resistance') + colram+ fillram
  ngassvgg176 = g + geom_sf(aes(fill = save.ng176,   col = save.ng176), lwd=0) + labs(fill = 'Annual hp savings\nfor top-selling hp\nover nat gas') + colram+ fillram
  propsvgg = g +    geom_sf(aes(fill = save.prop, col = save.prop), lwd=0)  + labs(fill = 'Annual hp savings\nfor top-selling hp\nover propane') + colram + fillram
  foilsvgg = g +    geom_sf(aes(fill = save.fo,   col = save.fo), lwd=0) + labs(fill = 'Annual hp savings\nfor top-selling hp\nover fuel oil') + colram+ fillram
  elhpPROTODOEsvgg = g + geom_sf(aes(fill = save.elhp.PROTO.DOE + save.elhp.TOP, col =  save.elhp.PROTO.DOE + save.elhp.TOP), lwd=0) +
    labs(fill = 'Annual DoE challenge cold-climate\nhp savings over\nelec.resistance') + 
    colram + fillram 
  
  ## Used in appendix
  ggsave(plot = elhpTOPsvgg, filename = file.path(FINAL.OUTPUT, 'PUMA_Savings_elresud.png'), width = 6, height = 4, units='in', dpi=300) # Apx ~Fig D.2
  ggsave(plot = ngassvgg176, filename = file.path(FINAL.OUTPUT, 'PUMA_Savings_ngud.png'), width = 6, height = 4, units='in', dpi=300)    # Apx ~Fig D.3
  ggsave(plot = propsvgg, filename = file.path(FINAL.OUTPUT, 'PUMA_Savings_propud.png'), width = 6, height = 4, units='in', dpi=300)     # Apx ~Fig D.4
  ggsave(plot = foilsvgg, filename = file.path(FINAL.OUTPUT, 'PUMA_Savings_foilud.png'), width = 6, height = 4, units='in', dpi=300)     # Apx ~Fig D.5
  ggsave(plot = elhpPROTODOEsvgg, filename = file.path(FINAL.OUTPUT, 'PUMA_Savings_DOEoverelres.png'), width = 6, height = 4, units='in', dpi=300) #--> Apx ~Fig D.8
  
  
  ### 
  ### 
  ### Composite Savings #----> The meat of the main analysis including Fig 7 in section 4
  ### 
  ### 
  
  #--> ~Fig 7
  compsvgg3176.TOP = g + geom_sf(aes(fill = composite.save3176.TOP, col = composite.save3176.TOP), lwd=0)  + labs(fill = 'Average annual\ntop-selling hp savings\nover current fuel by PUMA')  + colram + fillram600
  
  
  compsvgg3176.TOP.net = g + geom_sf(aes(fill = composite.save3176.TOP.net, col = composite.save3176.TOP.net), lwd=0)  + labs(fill = 'Average annual\nnet savings\ntop-selling hp\nover current fuel \nby PUMA')  + colram + fillram600
  
  compsvgg3176.TOP.net.share = g + geom_sf(aes(fill = share_HP_positive, col = share_HP_positive), lwd=0)  + labs(fill = 'Fraction of households\nwith positive\nnet benefits of adopting')  + colram.ratio01 + fillram.ratio01
  
  compsvgg3176.PROTO.DOE = g + geom_sf(aes(fill = composite.save3176.PROTO.DOE, col = composite.save3176.PROTO.DOE), lwd=0)  + labs(fill = 'Average annual DoE challenge\ncold-climate hp savings\nover current fuel by PUMA')  + colram + fillram
  
  compsvgg3176.PDT = g + geom_sf(aes(fill = composite.save3176.PROTO.DOE-composite.save3176.TOP, col = composite.save3176.PROTO.DOE-composite.save3176.TOP), lwd=0) +
    labs(fill = 'Annual composite savings:\n DoE challenge cold-climate\nover top-selling')  + colram + fillram

  #--> ~Apx Fig D.9
  DOEsave = compsvgg3176.PDT/compsvgg3176.PROTO.DOE   # composite DoE-CCHP savings in top, diff between DOE and top selling on bottom  ##--> ~Fig D.9
  
  ##
  ## Write out plots
  ## 
  
  ggsave(plot = compsvgg3176.TOP, filename = file.path(FINAL.OUTPUT, 'PUMA_Savings_3ud.png'), width = 6, height = 4, units='in', dpi=600, device = 'png')  ###-------> This is the main figure: ~Fig 7
  
  ggsave(plot = compsvgg3176.TOP.net, filename = file.path(FINAL.OUTPUT, 'PUMA_Savings_3udnet.png'), width = 6, height = 4, units='in', dpi=600, device = 'png')  ###-----> This is NET, Apx D, ~Fig D.15

  ggsave(plot = compsvgg3176.TOP.net.share, filename = file.path(FINAL.OUTPUT, 'PUMA_Savings_3udnetshare.png'), width = 6, height = 4, units='in', dpi=600, device = 'png') ##--> This is NET, App D, ~Fig D.12

  ggsave(plot = DOEsave, filename = file.path(FINAL.OUTPUT, 'DOEsave.png'), width=6, height = 8, units = 'in', dpi = 600) ###---> Appendix ~Fig D.7
  

  
  
  
    
  ###
  ### COP
  ### 
  
  
  avgCOP.TOP      = g + geom_sf(aes(fill = AWCOP65.TOP, col = AWCOP65.TOP), lwd=0) + labs(fill = 'Annual average\nHDD-weighted COP:\nTop-selling hp') 
  avgCOP.PROTO.DOE= g + geom_sf(aes(fill = AWCOP65.PROTO.DOE, col = AWCOP65.PROTO.DOE), lwd=1) + labs(fill = 'Annual average\nHDD-weighted COP:\nDoE challenge cold-climate hp') 
  avgCOP.PDT      = g + geom_sf(aes(fill = AWCOP65.PROTO.DOE/AWCOP65.TOP, col = AWCOP65.PROTO.DOE/AWCOP65.TOP), lwd=1) + labs(fill = 'Ratio of annual average\nHDD-weighted COP\nDoE challenge cold-climate to top-selling') + colram.ratio + fillram.ratio
  
  DOEcops = avgCOP.PROTO.DOE / avgCOP.PDT
  
  ggsave(plot = avgCOP.TOP, filename = file.path(FINAL.OUTPUT, 'AVGCOP_TOP.png'), width = 6, height = 4, units='in', dpi=600)  #-----> ~Fig B.2
  ggsave(plot = DOEcops, filename = file.path(FINAL.OUTPUT, 'DOEcops.png'), width = 6, height = 8, units='in', dpi=300)  #--> Apx D, ~Fig D.7
  
  
  
  
  
  ##
  ## Annual MMBTU and Prices
  ## 
  
  predmbtu = g + geom_sf(aes(fill = pred.mbtu/1000, col = pred.mbtu/1000), lwd=0)  + labs(fill = 'Predicted\nAnnual MMBTU') + fillram.comma + colram.comma
  ggsave(plot = predmbtu, filename = file.path(FINAL.OUTPUT, 'predmbtu.png'), width = 6, height = 4, units = 'in', dpi=600)  #---> Apx D, ~Fig D.1
  

  
  

  electricprices = g + geom_sf(aes(fill = cpkwh, col = cpkwh), lwd=0)  + labs(fill = 'Avg cost\nelectricity\nper kWh') + 
    scale_fill_gradient2(low = 'darkred', mid = 'white', high = 'darkblue', na.value = 'white', midpoint = 0, labels = dollar, limits = c(0, .35))  + 
    scale_color_gradient2(low = 'darkred', mid = 'white', high = 'darkblue', na.value = 'white', midpoint = 0,limits = c(0, .35))   
  ggsave(plot = electricprices, filename = file.path(FINAL.OUTPUT, 'electricprices.png'), width = 6, height = 4, units = 'in', dpi=600)  #--> Apx D, ~Fig D.6

} # end if(UseGPSC==T)


####
####
#### Regressions ####
#### 
#### 



seaborn6 =  c("#999999", "#E69F00", "#56B4E9",scales::alpha(c("#999999", "#E69F00", "#56B4E9"), .7))

incvars = incvarsa = c('HINC')
racvars = racvarsa = c('RACE')
urbvars = urbvarsa = c('URBAN')

###save.own.TOP is already weighted by heat pump shares for FUELHEAT==5




#### -----------> MAIN REGRESSION RESULTS
commod7 = feols(save.own.TOP ~ .[incvars] + .[racvars] + .[urbvars],      PUMA.analysis, cluster = ~ GEOID10, weights = ~WGTP) #**
commod8 = feols(save.own.TOP ~ .[incvars] + .[racvars] + .[urbvars] | ST, PUMA.analysis, cluster = ~ GEOID10, weights = ~WGTP) #**
commod9 = feols(save.own.TOP ~ .[incvars] + .[racvars] + .[urbvars] | GEOID10, PUMA.analysis, cluster = ~ GEOID10, weights = ~WGTP) #**
etable(commod7, commod8, commod9)

##----> max(0, savings)
commod7M = feols(save.own.TOP.max ~ .[incvars] + .[racvars] + .[urbvars],      PUMA.analysis, cluster = ~ GEOID10, weights = ~WGTP) #**
commod8M = feols(save.own.TOP.max ~ .[incvars] + .[racvars] + .[urbvars] | ST, PUMA.analysis, cluster = ~ GEOID10, weights = ~WGTP) #**
commod9M = feols(save.own.TOP.max ~ .[incvars] + .[racvars] + .[urbvars] | GEOID10, PUMA.analysis, cluster = ~ GEOID10, weights = ~WGTP) #**
etable(commod7M, commod8M, commod9M)

##---> max(0, savings)/income
commod7MP = feols(save.own.TOP.max.pctc ~ .[incvars] + .[racvars] + .[urbvars],      PUMA.analysis, cluster = ~ GEOID10, weights = ~WGTP) #**
commod8MP = feols(save.own.TOP.max.pctc ~ .[incvars] + .[racvars] + .[urbvars] | ST, PUMA.analysis, cluster = ~ GEOID10, weights = ~WGTP) #**
commod9MP = feols(save.own.TOP.max.pctc ~ .[incvars] + .[racvars] + .[urbvars] | GEOID10, PUMA.analysis, cluster = ~ GEOID10, weights = ~WGTP) #**
etable(commod7MP, commod8MP, commod9MP)


#########--> Net savings
##--> net savings
commod7N = feols(save.own.TOP.net ~ .[incvars] + .[racvars] + .[urbvars],      PUMA.analysis, cluster = ~ GEOID10, weights = ~WGTP) #**
commod8N = feols(save.own.TOP.net ~ .[incvars] + .[racvars] + .[urbvars] | ST, PUMA.analysis, cluster = ~ GEOID10, weights = ~WGTP) #**
commod9N = feols(save.own.TOP.net ~ .[incvars] + .[racvars] + .[urbvars] | GEOID10, PUMA.analysis, cluster = ~ GEOID10, weights = ~WGTP) #**
etable(commod7N, commod8N, commod9N)
print(rr)

##--> max(0, net savings)
commod7NM = feols(save.own.TOP.net.max ~ .[incvars] + .[racvars] + .[urbvars],      PUMA.analysis, cluster = ~ GEOID10, weights = ~WGTP) #**
commod8NM = feols(save.own.TOP.net.max ~ .[incvars] + .[racvars] + .[urbvars] | ST, PUMA.analysis, cluster = ~ GEOID10, weights = ~WGTP) #**
commod9NM = feols(save.own.TOP.net.max ~ .[incvars] + .[racvars] + .[urbvars] | GEOID10, PUMA.analysis, cluster = ~ GEOID10, weights = ~WGTP) #**
etable(commod7NM, commod8NM, commod9NM)

##--> max(0, net savings)/inc
commod7NMP = feols(save.own.TOP.net.max.pctc ~ .[incvars] + .[racvars] + .[urbvars],      PUMA.analysis, cluster = ~ GEOID10, weights = ~WGTP) #**
commod8NMP = feols(save.own.TOP.net.max.pctc ~ .[incvars] + .[racvars] + .[urbvars] | ST, PUMA.analysis, cluster = ~ GEOID10, weights = ~WGTP) #**
commod9NMP = feols(save.own.TOP.net.max.pctc ~ .[incvars] + .[racvars] + .[urbvars] | GEOID10, PUMA.analysis, cluster = ~ GEOID10, weights = ~WGTP) #**
etable(commod7NMP, commod8NMP, commod9NMP)



regdict = c('HINCLessthan10k' = 'Less than\n$10k',
            'HINC10-15k' = '$10-$15k',
            'HINC15-25k' = '$15-$25k',
            'HINC25-35k' = '$25-$35k',
            'HINC35-50k' = '$35-$50k',
            'HINC50-75k' = '$50-$75k',
            'HINC75-100k' = '$75-$100k',
            'HINC100-150k' = '$100-\n$150k',
            'HINC150-200k' = '$150-\n$200k',
            'RACEWhite' = 'White',
            'RACEBlack' = 'Black',
            'RACEAsian' = 'Asian',
            'RACENativeAm.' = 'Native Am.',
            'RACEOther' = 'Other',
            'URBANR' = 'Rural PUMA',
            'ST' = 'State',
            'GEOID10' = 'PUMA',
            'GEOID' = 'PUMA')

regdict2 = c('HINCLessthan10k' = 'Less than $10k',  # use for single-line Less than 10k
            'HINC10-15k' = '$10-$15k',
            'HINC15-25k' = '$15-$25k',
            'HINC25-35k' = '$25-$35k',
            'HINC35-50k' = '$35-$50k',
            'HINC50-75k' = '$50-$75k',
            'HINC75-100k' = '$75-$100k',
            'HINC100-150k' = '$100-$150k',
            'HINC150-200k' = '$150-$200k',
            'RACEWhite' = 'White',
            'RACEBlack' = 'Black',
            'RACEAsian' = 'Asian',
            'RACENativeAm.' = 'Native Am.',
            'RACEOther' = 'Other',
            'URBANR' = 'Rural PUMA',
            'ST' = 'State',
            'GEOID10' = 'PUMA',
            'GEOID' = 'PUMA')



st = style.tex(tablefoot.value = c('Clustered (PUMA) standard-errors in parentheses',
                                   'Signif. Codes: ^{***}: 0.01, ^{**}: 0.05, ^{*}: 0.1',
                                   'Omitted categories are income: \\$200k+, race: White'))




if(UseGPSC==T){

##------> savings & max(savings,0)   
## IN PAPER, main ~Fig 5
png(file.path(FINAL.OUTPUT,'fig_compreg3ud1max.png'), width = 7*260, height = 4*260, units = 'px', res=110)
coefplot(list(ols = commod7, stateFE = commod8, PUMAFE = commod9, 
              olsm = commod7M, stateFEM = commod8M, PUMAFEM = commod9M), pch=20,  drop = c('Constant|Intercept)'), dict = regdict, 
         x.shift = 3, sep = .1, cex = 1.1, ci.width = 0.02, pt.cex = 2.25,
         lwd = 3, ci.lty = c(rep(1,3), rep(1,3)), col = seaborn6, main = '')
abline(v = c(9.5, 13.5), col = 'gray50')
legend('top', col =  c(seaborn6[1:3],NA, 'gray50','gray50'), lty = c(rep(1,3),2,rep(1,3)), horiz = T, bg = 'white',
       pch=c(20,17,15,NA,20,21), cex = 1.1, pt.cex = 2,lwd=4, legend = c('No FE','State FE','PUMA FE','','Savings','Max(Savings,0)'))
dev.off()


## IN Apx D ~Tab D.2
etable(
  list(`1` = commod7, `2` = commod8, `3` = commod9,`4` = commod7M, `5` = commod8M, `6` = commod9M),
  headers = list("Savings" = 3, "Max\\{Savings, 0\\}" = 3), 
  dict = regdict2, tex = T, depvar = F, style.tex = st,
  label = paste0('tab:compreg3max',ifelse(UseGPSC==F, 'ga','')),
  title = 'Regression results: Savings and censored savings regressed on income, race, and urban/rural designation',
  file = file.path(FINAL.OUTPUT, paste0('tab_compreg3max', ifelse(UseGPSC==F, 'ga.tex','.tex'))), replace = T)




##-------> max(savings,0)/income (%)   # IN PAPER, main, ~Fig 6
png(file.path(FINAL.OUTPUT,'fig_compreg3ud1maxpct.png'), width = 7*260, height = 4*260, units = 'px', res=110)
coefplot(list(ols = commod7MP, stateFE = commod8MP, PUMAFE = commod9MP), pch=c(20,20,20),  drop = c('Constant|Intercept)'), dict = regdict, 
         x.shift = 1.5, sep = .15, cex = 1.1, ci.width = 0.02, pt.cex = 2.25,
         lwd = 3, ci.lty = c(rep(1,3), rep(1,3)), col = seaborn6, main = '', ylab = 'Relative savings as % of income x100')
legend('top', horiz=T, col = seaborn6, pch=c(20,17,15), pt.cex = 2, lwd=4, legend = c('No FE','State FE','PUMA FE'),bg = 'white')
abline(v = c(9.5, 13.5), col = 'gray50')
dev.off()



## In Apx D ~Tab D.3
etable(list(`1` = commod7MP,`2` = commod8MP, `3` = commod9MP), dict = regdict2, tex = T, depvar = F, style.tex = st,
       label = paste0('tab:compreg3maxpct',ifelse(UseGPSC==F, 'ga','')),
       title = 'Regression results: censored savings (max\\{0, savings\\}) from heat pump by income, race, and urban/rural designation, expressed as percent of income in percentage points',
       file = file.path(FINAL.OUTPUT, paste0('tab_compreg3maxpct', ifelse(UseGPSC==F, 'ga.tex','.tex'))), replace = T)







########---> NET OF SWITCHING SAVINGS
##------> net savings & max(net savings,0)    # IN Apx D, ~Fig D.13
png(file.path(FINAL.OUTPUT,'fig_compreg3ud1netmax.png'), width = 7*260, height = 4*260, units = 'px', res=110)
coefplot(list(ols = commod7N, stateFE = commod8N, PUMAFE = commod9N, 
              olsm = commod7NM, stateFEM = commod8NM, PUMAFEM = commod9NM), pch=20,  drop = c('Constant|Intercept)'), dict = regdict, 
         x.shift = 3, sep = .1, cex = 1.1, ci.width = 0.02, pt.cex = 2.25,
         lwd = 3, ci.lty = c(rep(1,3), rep(1,3)), col = seaborn6, main = '')
abline(v = c(9.5, 13.5), col = 'gray50')
legend('top', col =  c(seaborn6[1:3],NA, 'gray50','gray50'), lty = c(rep(1,3),2,rep(1,3)), horiz = T, bg = 'white',
       pch=c(20,17,15,NA,20,21), cex = 1.1, pt.cex = 2,lwd=4, legend = c('No FE','State FE','PUMA FE','','Net savings','Max(Net savings,0)'))
dev.off()

## Apx D, ~Tab D.5	
etable(
  list(`1` = commod7N, `2` = commod8N, `3` = commod9N,`4` = commod7NM, `5` = commod8NM, `6` = commod9NM),
  headers = list("Savings" = 3, "Max\\{Savings, 0\\}" = 3), 
  dict = regdict2, tex = T, depvar = F, style.tex = st,
  label = paste0('tab:compreg3netmax',ifelse(UseGPSC==F, 'ga','')),
  title = 'Regression results: Net savings and censored net savings regressed on income, race, and urban/rural designation',
  file = file.path(FINAL.OUTPUT, 'tab_compreg3netmax.tex'), replace = T)


## IN Apx ~Fig D.14
##-------> max(net savings,0)/income (%)  
png(file.path(FINAL.OUTPUT,'fig_compreg3ud1netmaxpct.png'), width = 7*260, height = 4*260, units = 'px', res=110)
coefplot(list(ols = commod7NMP, stateFE = commod8NMP, PUMAFE = commod9NMP), pch=c(20,20,20),  drop = c('Constant|Intercept)'), dict = regdict, 
         x.shift = 1.5, sep = .15, cex = 1.1, ci.width = 0.02, pt.cex = 2.25,
         lwd = 3, ci.lty = c(rep(1,3), rep(1,3)), col = seaborn6, main = '', ylab = 'Relative net savings as % of income x100')
legend('top', horiz=T, col = seaborn6, pch=c(20,17,15), pt.cex = 2, lwd=4, legend = c('No FE','State FE','PUMA FE'),bg = 'white')
abline(v = c(9.5, 13.5), col = 'gray50')
dev.off()

## Apx D, ~Tab D.6
etable(list(`1` = commod7NMP,`2` = commod8NMP, `3` = commod9NMP), dict = regdict2, tex = T, depvar = F, style.tex = st,
       label = paste0('tab:compreg3netmaxpct',ifelse(UseGPSC==F, 'ga','')),
       title = 'Regression results: censored net savings (max\\{0, net savings\\}) from heat pump by income, race, and urban/rural designation, expressed as percent of income in percentage points',
       file = file.path(FINAL.OUTPUT, 'tab_compreg3netmaxpct.tex'), replace = T)





          
          
          
          
          
          
          
          
          
######################################
### DoE Challenge regressions D2.2 ###


PUMA.analysis = PUMA.analysis %>% 
  dplyr::mutate(save.own.INCREMENTAL = save.own.PROTO.DOE - save.own.TOP)

challenge_total7 = feols(save.own.PROTO.DOE ~ .[incvars] + .[racvars] + .[urbvars], PUMA.analysis, cluster = ~ GEOID10, weights = ~WGTP)
challenge_total8 = feols(save.own.PROTO.DOE ~ .[incvars] + .[racvars] + .[urbvars] | ST, PUMA.analysis, cluster = ~ GEOID10, weights = ~WGTP)
challenge_total9 = feols(save.own.PROTO.DOE ~ .[incvars] + .[racvars] + .[urbvars] | GEOID10, PUMA.analysis, cluster = ~ GEOID10, weights = ~WGTP)

challenge_incremental7 = feols(save.own.INCREMENTAL ~ .[incvars] + .[racvars] + .[urbvars], PUMA.analysis, cluster = ~ GEOID10, weights = ~WGTP)
challenge_incremental8 = feols(save.own.INCREMENTAL ~ .[incvars] + .[racvars] + .[urbvars] | ST, PUMA.analysis, cluster = ~ GEOID10, weights = ~WGTP)
challenge_incremental9 = feols(save.own.INCREMENTAL ~ .[incvars] + .[racvars] + .[urbvars] | GEOID10, PUMA.analysis, cluster = ~ GEOID10, weights = ~WGTP)


## IN Apx D, ~Fig D.10
png(file.path(FINAL.OUTPUT,'fig_compreg4total.png'), width = 7*260, height = 4*260, units = 'px', res=110)
coefplot(list(ols = challenge_total7, stateFE = challenge_total8, PUMAFE = challenge_total9), pch=20,  drop = c('Constant|Intercept)'), dict = regdict, 
         lwd =3, col = c("#999999", "#E69F00", "#56B4E9"), main = '')
legend('topright', col =  c("#999999", "#E69F00", "#56B4E9"), pch=20, lwd=3, legend = c('No FE','State FE','PUMA FE'))
# title(sub = 'Base categories: >$200k; White')
abline(v = c(9.5, 13.5), col = 'gray50')
dev.off()

## In Apx D, ~Fig D.11
png(file.path(FINAL.OUTPUT,'fig_compreg4incremental.png'), width = 7*260, height = 4*260, units = 'px', res=110)
coefplot(list(ols = challenge_incremental7, stateFE = challenge_incremental8, PUMAFE = challenge_incremental9), pch=20,  drop = c('Constant|Intercept)'), dict = regdict, 
         lwd =3, col = c("#999999", "#E69F00", "#56B4E9"), main = '')
legend('topright', col =  c("#999999", "#E69F00", "#56B4E9"), pch=20, lwd=3, legend = c('No FE','State FE','PUMA FE'))
# title(sub = 'Base categories: >$200k; White')
abline(v = c(9.5, 13.5), col = 'gray50')
dev.off()


## In Apx D, ~Tab D.4
etable(list(`1` = challenge_total7, `2` = challenge_total8, `3` = challenge_total9, `4` = challenge_incremental7, `5` = challenge_incremental8, `6` = challenge_incremental9), 
       dict = regdict2, tex = T, depvar = F, style.tex = st,
       label = 'tab:compreg4',
       headers = list('Composite savings'=3, 'Incremental savings' = 3),
       title = 'Regression results: composite savings and incremental savings from DoE Challenge heat pump by income, race, and urban/rural designation',
       file = file.path(FINAL.OUTPUT, 'tab_compreg4.tex'), replace = T)



jStripNames<-function(var, sus){
  replace.var = unique(PUMA.analysis %>% dplyr::pull(!!var))
  rvn = paste0('as.factor(',var,')', gsub(pattern = ' ', replacement = '', replace.var))
  replace.var = paste0(sus, replace.var)
  names(replace.var) = rvn
  replace.var
}

reg.money = paste0('Income: ', regdict2[2:8])
names(reg.money) = paste0('as.factor(MONEYPY)',2:8)

regdict3 = c(
  'TOTALBTUSPHuse' = 'Total Usable MBTU',
  regdict2, reg.money,
  jStripNames('YEARMADERANGE', 'Year Built: '), jStripNames('IECC', 'IECC: '),
  jStripNames('BEDROOMS', 'Bedrooms: '), jStripNames('TOTROOMS', 'Total Rooms: '),
  'as.factor(FUELHEAT)2' = 'Heating Fuel: Propane',
  'as.factor(FUELHEAT)3' = 'Heating Fuel: Fuel Oil',
  'as.factor(FUELHEAT)5' = 'Heating Fuel: Electricity'
)


### Usable MBTU Regression Model ###
##  IN APPENDIX D, ~Tab D.1
etable(list(`1` = pmod1), 
       dict = regdict3, depvar = T, tex = T, se.below = F, fontsize = 'small',
       cluster = ~IECC,
       digits = 3, label = 'tab:mbtureg1',
       title = 'Regression results: Usable MBTU on observable household and home characteristics',
       file = file.path(FINAL.OUTPUT, 'tab_mbtureg1.tex'),
       replace = T)


} # end if(UseGPSC==T)



##--> IN APPENDIX NO GPSC
if(UseGPSC==F){
  ga = ggplot(PUMA.analysis.collapse %>% dplyr::filter(State=='GA')) + theme_void() + guides(col = 'none') + 
    theme(legend.title = element_text(size = 8), legend.position = 'bottom', legend.direction = 'horizontal')  
  
  compsvgg3176.TOPga = ga + geom_sf(aes(fill = composite.save3176.TOP, col = composite.save3176.TOP), lwd=0)  + labs(fill = 'Average annual top-selling hp savings\nover current fuel by PUMA')  + colram + fillram
  


  ##---> App D, ~Fig D.16
  ggsave(plot = compsvgg3176.TOPga, filename = file.path(FINAL.OUTPUT, 'PUMA_Savings_3udga.png'), width = 4, height = 5, units='in', dpi=600, device = 'png')  
  
  
  ## In Apx D ~Tab D.7 (when UseGPSC==T)
  etable(
    list(`1` = commod7, `2` = commod8, `3` = commod9,`4` = commod7M, `5` = commod8M, `6` = commod9M),
    headers = list("Savings" = 3, "Max\\{Savings, 0\\}" = 3), 
    dict = regdict2, tex = T, depvar = F, style.tex = st,
    label = paste0('tab:compreg3max',ifelse(UseGPSC==F, 'ga','')),
    title = 'Regression results: Savings and censored savings regressed on income, race, and urban/rural designation',
    file = file.path(FINAL.OUTPUT, paste0('tab_compreg3max', ifelse(UseGPSC==F, 'ga.tex','.tex'))), replace = T)
  
  
  ## In Apx D ~ Tab D.8 (when UseGPSC==T)
  etable(list(`1` = commod7MP,`2` = commod8MP, `3` = commod9MP), dict = regdict2, tex = T, depvar = F, style.tex = st,
         label = paste0('tab:compreg3maxpct',ifelse(UseGPSC==F, 'ga','')),
         title = 'Regression results: censored savings (max\\{0, savings\\}) from heat pump by income, race, and urban/rural designation, expressed as percent of income in percentage points',
         file = file.path(FINAL.OUTPUT, paste0('tab_compreg3maxpct', ifelse(UseGPSC==F, 'ga.tex','.tex'))), replace = T)

}





######
######
#### AHRI plots ####
######
###### 
# From AHRI website archives: https://www.ahrinet.org/analytics/statistics/monthly-shipments

AHRI_fornow = tribble( ~ Year, ~GasWarmAirFurnaces, ~OilWarmAirFurnaces, ~AC, ~HeatPump, ~WaterHeaterGas, ~WaterHeaterElectric,
                       2009, 2174528, 55619, 3515648, 1642064, 3760657, 3751994,
                       2010, 2453279, 56445, 3419506, 1747979, 3918150, 3936597,
                       2011, 2216160, 48247, 3744691, 1765002, 3953113, 3738882,
                       2012, 2243379, 35980, 3915869, 1697796, 3959444, 3733988,
                       2013, 2601760, 32144, 4201068, 1968632, 4282104, 4008478,
                       2014, 2734713, 34725, 4499660, 2353990, 4471903, 4277329,
                       2015, 2814203, 38181, 4545876, 2269196, 4374199, 4027067,
                       2016, 2942545, 37743, 4900992, 2429867, 4208984, 3937936,
                       2017, 3133768, 37268, 5185747, 2619782, 4359297, 4127302,
                       2018, 3416571, 38429, 5399760, 2940502, 4521373, 4229912,
                       2019, 3441872, 40692, 5359775, 3109840, 4377001, 4201274,
                       2020, 3351176, 36505, 5910284, 3418478, 4584367, 4653688,
                       2021, 4008894, 39548, 6282285, 3916766, 4967079, 4880746)  # updated with Dec 2021 report (w/final 2021 numbers) on 3-11-22

AHRI_long = AHRI_fornow %>% 
  pivot_longer(cols = GasWarmAirFurnaces:WaterHeaterElectric, names_to = 'Appliance') %>%
  dplyr::mutate(Type = case_when(grepl('Furnace', Appliance) ~ 'Furnace',
                                 Appliance %in% c('AC','HeatPump') ~ 'AC or Heat Pump',
                                 grepl('WaterHeater', Appliance) ~ 'Water Heater')) %>%
  group_by(Appliance) %>% 
  dplyr::mutate(value.relative.09 = value - value[Year==2009])


AHRI_shares = AHRI_fornow %>% 
  dplyr::mutate(WaterHeaterShareElectric = WaterHeaterElectric / (WaterHeaterElectric + WaterHeaterGas),
                FurnaceShareHeatPump = HeatPump / (GasWarmAirFurnaces + OilWarmAirFurnaces + HeatPump),
                CoolingShareHeatPump = HeatPump / (AC + HeatPump)) %>%  # Note that Heat Pumps are both Cooling and Furnace.
  dplyr::select(Year, contains('Share')) %>%
  pivot_longer(cols = contains('Share'), names_to = 'Share') %>%
  dplyr::mutate(Type = case_when(grepl('Furnace', Share) ~ 'Furnace',
                                 grepl('Cooling', Share) ~ 'Cooling',
                                 grepl('WaterHeater', Share) ~ 'Water Heater'))




##--> Apx A, ~Fig A.1
AnnualShipments = ggplot(AHRI_long, aes(x = Year, y = value, col = Appliance)) + # coord_cartesian(ylim = c(0, max(AHRI_long$value))) +
  geom_path() + facet_wrap(~Appliance, scale = 'free_y') + theme_bw() + theme(legend.position = 'none') +
  scale_y_continuous(labels = comma) + 
  labs(subtitle = 'Number of Appliances Shipped in US by type excluding window-mount AC', caption = 'Source: Air-Conditioning, Heating, and Refrigeration Instutite (AHRI.org)')

ggsave(plot = AnnualShipments, filename = file.path(FINAL.OUTPUT, 'PlotAnnualShipments-1.png'), width = 4, height = 5, units='in', dpi=600, device = 'png')  



##--> Apx A, ~Fig A.2
AnnualShipments.09 = ggplot(AHRI_long, aes(x = Year, y = value.relative.09, col = Appliance)) + # coord_cartesian(ylim = c(0, max(AHRI_long$value))) +
  geom_path() + 
  # facet_wrap(~Appliance, scale = 'free_y') + 
  labs(y = 'Number shipped relative to 2009') + 
  theme_bw() + theme(legend.position = 'none') +
  geom_label_repel(data = AHRI_long %>% dplyr::filter(Year==2020), aes(label = Appliance)) +
  scale_y_continuous(labels = comma)

ggsave(plot = AnnualShipments.09, filename = file.path(FINAL.OUTPUT, 'PlotAnnualChangeShipments.png'), width = 4, height = 5, units='in', dpi=600, device = 'png')  



##--> Apx A, ~Fig A.3
AnnualShares = ggplot(AHRI_shares %>% dplyr::filter(Type!='Water Heater'), aes(x = Year, y = value, col = Share)) + 
  geom_path() + facet_wrap(~Type, scale = 'fixed') + theme_bw() + theme(legend.position = 'none') + 
  scale_y_continuous(labels = percent) + 
  labs(subtitle = 'Source: Air-Conditioning, Heating, and Refrigeration Instutite (AHRI.org)',
       y = 'Share')

ggsave(plot = AnnualShares, filename = file.path(FINAL.OUTPUT, 'PlotAnnualShares-1.png'), width = 4, height = 5, units='in', dpi=600, device = 'png') 



